Loading the dataset

library(httr)
## 
## Attaching package: 'httr'
## The following object is masked from 'package:plotly':
## 
##     config
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
## 
##     flatten
get_all_inspections = function(url) {
  
  all_inspections = vector("list", length = 0)
  
  loop_index = 1
  chunk_size = 50000
  DO_NEXT = TRUE
  
  while (DO_NEXT) {
    message("Getting data, page ", loop_index)
    
    all_inspections[[loop_index]] = 
      GET(url,
          query = list(`$order` = "zipcode",
                       `$limit` = chunk_size,
                       `$offset` = as.integer((loop_index - 1) * chunk_size)
                       )
          ) %>%
      content("text") %>%
      fromJSON() %>%
      as_tibble()
    
    DO_NEXT = dim(all_inspections[[loop_index]])[1] == chunk_size
    loop_index = loop_index + 1
  }
  
  all_inspections
  
}

url = "https://data.cityofnewyork.us/resource/43nn-pn8j.json"

nyc_inspections = 
  get_all_inspections(url) %>%
  bind_rows() 
## Getting data, page 1
## Getting data, page 2
## Getting data, page 3
## Getting data, page 4
## Getting data, page 5
## Getting data, page 6

Data cleaning

nyc_inspections_cleaned = nyc_inspections %>%
  mutate(score = as.numeric(score), 
         inspection_date = as.Date(inspection_date),
         grade_date = as.Date(grade_date),
         record_date = as.Date(record_date)) %>%
  filter(!is.na(score) & !is.na(boro) & !is.na(inspection_date)) %>%
  distinct()
nyc_inspections_cleaned
## # A tibble: 243,200 × 26
##    camis    boro     building street zipcode phone inspection_date critical_flag
##    <chr>    <chr>    <chr>    <chr>  <chr>   <chr> <date>          <chr>        
##  1 50147715 Manhatt… 2        W 69T… 10000   3474… 2024-04-23      Not Critical 
##  2 50132187 Manhatt… NKA      CENTR… 10000   6469… 2024-07-23      Critical     
##  3 50132187 Manhatt… NKA      CENTR… 10000   6469… 2023-02-27      Critical     
##  4 50132187 Manhatt… NKA      CENTR… 10000   6469… 2023-02-27      Not Critical 
##  5 50132187 Manhatt… NKA      CENTR… 10000   6469… 2024-07-23      Not Critical 
##  6 50132187 Manhatt… NKA      CENTR… 10000   6469… 2023-02-27      Not Critical 
##  7 50147715 Manhatt… 2        W 69T… 10000   3474… 2024-09-12      Not Critical 
##  8 50147715 Manhatt… 2        W 69T… 10000   3474… 2024-04-23      Critical     
##  9 50132187 Manhatt… NKA      CENTR… 10000   6469… 2023-02-27      Not Critical 
## 10 50147715 Manhatt… 2        W 69T… 10000   3474… 2024-04-23      Critical     
## # ℹ 243,190 more rows
## # ℹ 18 more variables: record_date <date>, dba <chr>,
## #   cuisine_description <chr>, action <chr>, violation_code <chr>,
## #   violation_description <chr>, score <dbl>, inspection_type <chr>,
## #   latitude <chr>, longitude <chr>, community_board <chr>,
## #   council_district <chr>, census_tract <chr>, bin <chr>, bbl <chr>,
## #   nta <chr>, grade <chr>, grade_date <date>

Column

Chart A— Scatterplot

nyc_inspections_cleaned %>%
  mutate(text_label = str_c("Borough:", boro, "\nScore:", score))%>%
  plot_ly(x = ~inspection_date, y = ~score, type = 'scatter', mode = 'markers',
          color = ~score, text = ~text_label, alpha = 0.5) %>%
  layout(title = "Scatterplot of Scores Over Inspection Dates",
         xaxis = list(title = "Inspection Date"),
         yaxis = list(title = "Score"))

Column

Chart B—Box plot

nyc_inspections_cleaned %>%
  mutate(boro = fct_reorder(boro, score)) %>%
  plot_ly(y = ~score, color = ~boro, type = "box", colors = "viridis")%>%
  layout(title = "Boxplot of Scores by Borough",
         xaxis = list(title = "Borough"),
         yaxis = list(title = "Score"))

Chart C— Bar plot

nyc_inspections_cleaned %>% 
  count(boro) %>%
  mutate(boro = fct_reorder(boro, n)) %>%
  plot_ly(x = ~boro, y = ~n, color = ~boro, type = "bar", colors = "viridis")%>%
  layout(title = "Count of Inspections by Borough",
         xaxis = list(title = "Borough"),
         yaxis = list(title = "Inspection Count"))